package crawler.mazida;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zql.entity.AgencyEntity;

import util.HttpConnectionGet;
import util.Location;
import util.MybatisTool;
import util.TianYanCha;
//有空[] 
public class ChangAnMaZiDaCrawler {
	public List<AgencyEntity> getAgency() {
		List<AgencyEntity> list = new ArrayList<AgencyEntity>();
		try {
			String getPro = "http://www.changan-mazda.com.cn/dictionary/province";
			String docText = HttpConnectionGet.getJson(getPro);
			//System.out.println(docText);
			int start = docText.indexOf("[{");
			int end = docText.indexOf("}];")+2;
			//System.out.println("start:"+start+",end:"+end);
			JSONArray proArray = JSON.parseArray(docText.substring(start, end));
			for(int i=0;i<proArray.size();i++) {
				String proName = proArray.getJSONObject(i).getString("name");
				String proNum = proArray.getJSONObject(i).getString("id");
				
				String getCity = "http://www.changan-mazda.com.cn/dictionary/city/"+proNum;
				String cityStr = getJSON(getCity);
				//System.out.println(cityStr);
				JSONArray citys = JSON.parseArray(cityStr);
				for(int j=0;j<citys.size();j++) {
					String cityName = citys.getJSONObject(j).getString("name");
					String cityNum = citys.getJSONObject(j).getString("id");
					String getDealer = "http://www.changan-mazda.com.cn/dictionary/dealer/"+cityNum;
					String dealerText = getJSON(getDealer);
					JSONArray dealerArray = JSON.parseArray(dealerText);
					if(dealerArray==null) {
						continue;
					}
					for(int k=0;k<dealerArray.size();k++) {
						JSONObject dealer = dealerArray.getJSONObject(k);
						String name = dealer.getString("dea_name");
						String address = dealer.getString("dea_address");
						String sellTell = dealer.getString("dea_sale_phone");
						String[] lngAndLat = Location.getLocation(address);
						String lng = lngAndLat[0];
						String lat = lngAndLat[1];
						List<String> shareholder = TianYanCha.getShareholder(name);
						String sControllingShareholder = shareholder.get(0);
						String sOtherShareholders = shareholder.get(1);
						AgencyEntity agency = new AgencyEntity();
						agency.setdCloseDate(null);
						agency.setdOpeningDate(null);
						agency.setdUpdateTime(new Timestamp(System.currentTimeMillis()));
						agency.setnBrandID(-1);
						agency.setsBrand("马自达");

						agency.setnDealerIDWeb(-1);
						agency.setnManufacturerID(-1);
						agency.setsManufacturer("长安马自达");

						agency.setnState(1);
						agency.setsAddress(address);
						agency.setsCity(cityName);
						agency.setsCustomerServiceCall(null);
						agency.setsDealerName(name);
						agency.setsDealerType(null);
						agency.setsProvince(proName);
						agency.setsSaleCall(sellTell);
						agency.setsLongitude(lng);
						agency.setsLatitude(lat);
						agency.setsControllingShareholder(sControllingShareholder);
						agency.setsOtherShareholders(sOtherShareholders);
						list.add(agency);
					}
				}
			}
			
		} catch(Exception e) {
			throw new RuntimeException("爬虫异常",e);
		}
		
		
		return list;
	}
	public String getJSON(String path) {
		BufferedReader br = null;
		
		try {
			URL url = new URL(path);
			HttpURLConnection conn = (HttpURLConnection)url.openConnection();
			//设置通用属性
			conn.setRequestProperty("Accept", "application/json, text/javascript, */*; q=0.01");
			conn.setRequestProperty("Accept-Encoding", "gzip,deflate");
			conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.5");
			conn.setRequestProperty("Connection", "keep-alive");
			conn.setRequestProperty("Cookie", "_dc3c=1; Hm_lvt_8ac456d3f48e55a4f1db93ee8a3bdae3=1506778569,1506844041; Hm_lpvt_8ac456d3f48e55a4f1db93ee8a3bdae3=1506845962; dmt2=5%7C0%7C0%7Cwww.changan-mazda.com.cn%2Fdealer%7C; dmts2=1; dm2=2%7C1506845962%7C0%7C%7C%7C%7C%7C1506778570%7C1506778570%7C1506778570%7C1506844041%7Cfbeb819f5714ed3e903d11558da3619d%7C0%7C%7C; dcad2=; dc_search2=; CIGDCID=fbeb819f5714ed3e903d11558da3619d; _ga=GA1.3.549206199.1506778570; _gid=GA1.3.125866624.1506778570");
			conn.setRequestProperty("Host", "www.changan-mazda.com.cn");
			conn.setRequestProperty("Referer", "http://www.changan-mazda.com.cn/dealer");
			conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Mobile Safari/537.36");
			conn.setRequestProperty("X-Requested-With", "XMLHttpRequest");
			conn.connect();//可以省了,使用下面的urlConn.getOutputStream()会自动connect
			int code = conn.getResponseCode();
			if(code != 200) {
				return null;
			}
			//建立输入流,读取返回的信息
			//利用gzip格式解压返回的数据
			//获取返回的内容编码时候为gzip
			//System.out.println(conn.getContentEncoding());
			if ("gzip".equals(conn.getContentEncoding())) {
			    GZIPInputStream gzipInputStream = new GZIPInputStream(conn.getInputStream());
				br = new BufferedReader(new InputStreamReader(gzipInputStream));
			} else {
				br = new BufferedReader(new InputStreamReader(conn.getInputStream()));
			}

			String line = null; // 每行内容
			String content = "";
			while ((line = br.readLine()) != null) {
				content += line;
			}
			//System.out.println(content);
			return content;
			
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if(br != null) {
				try {
					br.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		
		
		return null;
		
		
	}
	public static void main(String[] args) {
		ChangAnMaZiDaCrawler crawler = new ChangAnMaZiDaCrawler();
		System.out.println("爬虫开始...");
		List<AgencyEntity> agencys = crawler.getAgency();
		System.out.println("抓取完毕,正在存库");
		for(AgencyEntity agency : agencys) {
			MybatisTool.save(agency);
		}
		MybatisTool.close();
		System.out.println("请查看数据库");
	}

}
